GSE149524

there’re 4 batches

two P21 replicates for neuron atlas

one E15.5 and one E18.5 for trajectory analysis

load dependancies

load 10x data

GEX <- Read10X(data.dir = "../ref.GSE149524/GSE149524_RAW/GSM4504450_P21_1/")

check datasets

dim(GEX)
## [1] 27998  4920
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCACAGTCGCAC-1 AAACCCAGTAGGTACG-1 AAACCCATCGAACTCA-1
## Xkr4                     1                  1                  2
## Gm1992                   .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Rp1.1                    .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCATCGCCTCTA-1 AAACGAAAGATGCAGC-1 AAACGAACATAAGCGG-1
## Xkr4                     1                  .                  .
## Gm1992                   .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Rp1.1                    .                  .                  .
## Sox17                    .                  .                  .

GEX

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "P21.rep1")
GEX.seur
## An object of class Seurat 
## 17943 features across 4920 samples within 1 assay 
## Active assay: RNA (17943 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
grep("^Rps|^Rpl",rownames(GEX),value = T)
##   [1] "Rpl7"       "Rpl31"      "Rpl37a"     "Rps6kc1"    "Rpl7a"     
##   [6] "Rpl12"      "Rpl35"      "Rps21"      "Rpl39"      "Rpl10"     
##  [11] "Rps4x"      "Rps6ka6"    "Rpl36a"     "Rps6ka3"    "Rpl22l1"   
##  [16] "Rps3a1"     "Rps27"      "Rpl34"      "Rps20"      "Rps6"      
##  [21] "Rps8"       "Rps6ka1"    "Rpl11"      "Rpl22"      "Rpl9"      
##  [26] "Rpl5"       "Rplp0"      "Rpl6"       "Rpl21"      "Rpl32"     
##  [31] "Rps9"       "Rpl28"      "Rps5"       "Rps19"      "Rps16"     
##  [36] "Rps11"      "Rpl13a"     "Rpl18"      "Rps17"      "Rps3"      
##  [41] "Rpl27a"     "Rps13"      "Rps15a"     "Rplp2"      "Rps12"     
##  [46] "Rps15"      "Rpl6l"      "Rpl41"      "Rps26"      "Rpl18a"    
##  [51] "Rps18-ps3"  "Rps26-ps1"  "Rpl13"      "Rpl21-ps4"  "Rpl15"     
##  [56] "Rps24"      "Rpl23a-ps3" "Rpl13-ps3"  "Rps2-ps6"   "Rps25"     
##  [61] "Rpl10-ps3"  "Rplp1"      "Rpl4"       "Rps27l"     "Rpl29"     
##  [66] "Rps27rt"    "Rpsa"       "Rpl14"      "Rps27a"     "Rpl26"     
##  [71] "Rpl23a"     "Rpl9-ps1"   "Rps6kb1"    "Rpl23"      "Rpl19"     
##  [76] "Rpl27"      "Rpl38"      "Rps23"      "Rpl36-ps3"  "Rps7"      
##  [81] "Rpl10l"     "Rps29"      "Rpl36al"    "Rps6kl1"    "Rps6ka5"   
##  [86] "Rpl37"      "Rpl30"      "Rpl7a-ps3"  "Rpl8"       "Rpl3"      
##  [91] "Rps19bp1"   "Rpl39l"     "Rpl35a"     "Rpl24"      "Rps6ka2"   
##  [96] "Rps2"       "Rpl3l"      "Rps10"      "Rpl10a"     "Rps28"     
## [101] "Rps18"      "Rpl7l1"     "Rpl36"      "Rpl7a-ps5"  "Rpl36-ps4" 
## [106] "Rpl27-ps3"  "Rps14"      "Rpl17"      "Rps6kb2"    "Rps6ka4"   
## [111] "Rpl9-ps6"   "Rpl13a-ps1" "Rps12-ps3"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

plota1 <- FeatureScatter(GEX.seur, feature1 = "nFeature_RNA", feature2 = "percent.mt") 
plotb1 <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc1 <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota1 + plotb1 + plotc1

filtering

GEX.seur <- subset(GEX.seur, subset = nFeature_RNA > 1000 & nFeature_RNA < 8000 & nCount_RNA < 60000 & percent.mt < 10)
GEX.seur
## An object of class Seurat 
## 17943 features across 3160 samples within 1 assay 
## Active assay: RNA (17943 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

plota1 <- FeatureScatter(GEX.seur, feature1 = "nFeature_RNA", feature2 = "percent.mt") 
plotb1 <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc1 <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota1 + plotb1 + plotc1

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
## Warning: The following features are not present in the object: E2f8, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: Pimreg, Jpt1, not
## searching for symbol synonyms
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), #group.by = "FB.info", 
    ncol = 2, pt.size = 0.01)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

nearly no cycling !

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 100)
##   [1] "Gal"           "Cartpt"        "Vip"           "Calcb"        
##   [5] "Penk"          "Sst"           "Pcp4l1"        "Cck"          
##   [9] "Krt19"         "Adgrg6"        "Apoe"          "Nnat"         
##  [13] "Scgn"          "Nefl"          "Npy"           "Nmu"          
##  [17] "Grp"           "Ntng1"         "Crabp1"        "Csrp2"        
##  [21] "Sdpr"          "Cbln2"         "Bglap"         "Calb1"        
##  [25] "Mt3"           "Dbh"           "Ucn3"          "Sparc"        
##  [29] "Myl1"          "Ly6c1"         "Zfp804a"       "Cpne4"        
##  [33] "Cdkn1c"        "Mgat4c"        "Tac1"          "Tspan8"       
##  [37] "Nefm"          "Pcdh10"        "Ifitm3"        "Abca9"        
##  [41] "Ifi27l2a"      "Ebf1"          "Ano2"          "Paip2b"       
##  [45] "Nxph2"         "Serpine2"      "Serpini1"      "Adcyap1"      
##  [49] "Nog"           "Neurod6"       "Bglap2"        "Slc17a6"      
##  [53] "Tmeff2"        "Pnoc"          "Fst"           "Isg15"        
##  [57] "Cntn5"         "Skap1"         "Fxyd1"         "Igfbp7"       
##  [61] "Pdyn"          "Galr1"         "Grin3a"        "Itih5"        
##  [65] "Col3a1"        "Gad2"          "Rprml"         "Hspa1a"       
##  [69] "Cox8b"         "Dapk2"         "A330069E16Rik" "Rarres2"      
##  [73] "Pmepa1"        "Pcp4"          "Cckar"         "Gsn"          
##  [77] "AI593442"      "Col12a1"       "Lgals3"        "Rbp4"         
##  [81] "Arpc1b"        "Ndufa4l2"      "Phlda1"        "Ntrk3"        
##  [85] "Entpd2"        "Avil"          "Cd24a"         "Layn"         
##  [89] "Efr3a"         "Oas1d"         "Matn2"         "Gng11"        
##  [93] "Dgkg"          "Agrp"          "Col9a2"        "Vim"          
##  [97] "Calca"         "Scn7a"         "Gfap"          "Col18a1"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|Egr1|RP23-|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AI|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


GEX.seur <- RunPCA(GEX.seur, features = setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) ))
## PC_ 1 
## Positive:  Prph, Bex2, Pcsk1n, Snhg11, Ywhah, Tubb2b, Resp18, Tubb3, Mllt11, Nsg1 
##     Snrpn, Gap43, Rab3c, Scg2, Phox2a, Ndn, Zcchc12, Aplp1, Fabp5, Fxyd7 
##     Nrsn1, Tm4sf4, Tagln3, Fgf13, Lix1, Atp1a1, Ptprn, Map1b, Chga, Tm4sf1 
## Negative:  Apoe, Rarres2, Fxyd1, Col12a1, Sparc, Col18a1, Lpar1, Entpd2, Gsn, Sdc4 
##     Pmepa1, Nid1, Plp1, Atp1a2, Col3a1, Ifitm3, Tmprss5, Gpr37l1, Gpm6b, Timp3 
##     Cmtm5, Nkain4, Ttyh1, Qk, S100b, Dbi, Serpinh1, Arpc1b, Foxd3, Scn7a 
## PC_ 2 
## Positive:  Etv1, Vip, Auts2, Tbx3, Gal, Nos1, Rprml, Fxyd5, Tesc, Tmem176a 
##     Alcam, Dgkb, Rit2, Cartpt, Fam89a, Tspan13, C1ql1, S100a11, Cadps2, Ltk 
##     Tmem108, Tmem130, Stmn1, Kitl, Map1b, Stxbp6, Cpne2, Dgkg, Arhgap15, Slc35d3 
## Negative:  Dmkn, Calb2, Satb1, Oprk1, Tshz2, Chgb, Ly6e, Brinp2, Dsc3, Rab3b 
##     Ndufa4l2, Tac1, Slc18a3, Il11ra1, Ffar3, Pi15, Plcxd3, Prmt8, Folr1, Sncb 
##     Casz1, Ly6c1, Necab2, Aqp1, Syt6, Rgs4, Rgcc, Ifitm2, Tpd52l1, Tm4sf4 
## PC_ 3 
## Positive:  Tmeff2, Myl1, Id4, Avil, Ntrk3, Pcdh10, Nnat, Pbx3, Mgat4c, Serpini1 
##     Spock3, Mt3, Nrsn2, Klhl1, Adra2a, Rab27b, Fam19a1, Tbx2, Amigo2, Scgn 
##     Cd24a, Snx7, Rph3a, Kcnk2, Nefl, Cck, Abhd11os, Enc1, Tmeff1, Nefm 
## Negative:  S100a16, Nos1, Rprml, S100a4, Ass1, Ncald, C1ql1, Gal, Kitl, Cygb 
##     Stxbp6, Npy, Qdpr, Adamts5, Cox8b, Etv1, Tes, Bglap2, Mfsd4, Kcnab1 
##     Tmem255b, Rbp4, Bglap, Ppp1r14c, Fxyd5, Arhgap15, Cadps2, R3hcc1, Nxn, Abcb1b 
## PC_ 4 
## Positive:  Kcnip4, Cd24a, Gda, Htr2b, Pcp4, Nrsn2, Ptn, Skap1, Socs2, Chchd10 
##     Lin7a, Penk, Ogfrl1, Stmn1, Kctd8, Atp1a3, Smyd3, Skap2, Grem2, Tmie 
##     Mrap2, Csmd3, Fhad1, Rogdi, Edil3, Pgm5, Tac1, Ccdc109b, Spock1, Sphkap 
## Negative:  Nog, Syt15, Ano2, Cpne4, Krt19, Dapk2, Grin3a, Nmu, Gpr149, Slc25a48 
##     Edn1, Cysltr2, Dgkg, Npas1, P2rx2, Adora1, Calcb, Zfp804a, Scube1, Atoh8 
##     Cidea, Dlx3, Hpca, Layn, Tcf7l2, Pdzrn4, Adgrg6, Syt2, Slc4a4, Cbln2 
## PC_ 5 
## Positive:  Ddah1, Nefl, Klhl1, Cck, Basp1, Nefm, March1, Ucn3, Slc17a6, Pbx3 
##     Cdh9, Scgn, Ecel1, Vwc2l, Mgat4c, Sema5a, Prune2, Kcng1, Bdnf, Brinp3 
##     Lgals3, Grm5, Lxn, Pcdh7, Apela, Tm4sf1, Zbbx, Rasgef1b, Enc1, Rasgrf1 
## Negative:  Gda, Ndst4, Grem2, Htr2b, Klhdc8a, S100a13, S100a1, Cd79a, Cst12, Tac1 
##     Pgm5, Scn11a, Abca5, Necab1, Pdlim3, Cntn5, Cbln2, Syt15, Bend5, Zfp804a 
##     Ptger3, Rab3b, Hpgd, Epha5, Lingo2, Nog, Nxph1, Asic4, Ptn, Crtac1
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG, CC_gene) ))
## [1] 1372
head(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG, CC_gene) ),300)
##   [1] "Gal"      "Cartpt"   "Vip"      "Calcb"    "Penk"     "Sst"     
##   [7] "Pcp4l1"   "Cck"      "Krt19"    "Adgrg6"   "Apoe"     "Nnat"    
##  [13] "Scgn"     "Nefl"     "Npy"      "Nmu"      "Grp"      "Ntng1"   
##  [19] "Crabp1"   "Csrp2"    "Sdpr"     "Cbln2"    "Bglap"    "Calb1"   
##  [25] "Mt3"      "Dbh"      "Ucn3"     "Sparc"    "Myl1"     "Ly6c1"   
##  [31] "Zfp804a"  "Cpne4"    "Cdkn1c"   "Mgat4c"   "Tac1"     "Tspan8"  
##  [37] "Nefm"     "Pcdh10"   "Ifitm3"   "Abca9"    "Ifi27l2a" "Ebf1"    
##  [43] "Ano2"     "Paip2b"   "Nxph2"    "Serpine2" "Serpini1" "Adcyap1" 
##  [49] "Nog"      "Neurod6"  "Bglap2"   "Slc17a6"  "Tmeff2"   "Pnoc"    
##  [55] "Fst"      "Isg15"    "Cntn5"    "Skap1"    "Fxyd1"    "Igfbp7"  
##  [61] "Pdyn"     "Galr1"    "Grin3a"   "Itih5"    "Col3a1"   "Gad2"    
##  [67] "Rprml"    "Cox8b"    "Dapk2"    "Rarres2"  "Pmepa1"   "Pcp4"    
##  [73] "Cckar"    "Gsn"      "Col12a1"  "Lgals3"   "Rbp4"     "Arpc1b"  
##  [79] "Ndufa4l2" "Phlda1"   "Ntrk3"    "Entpd2"   "Avil"     "Cd24a"   
##  [85] "Layn"     "Efr3a"    "Oas1d"    "Matn2"    "Gng11"    "Dgkg"    
##  [91] "Agrp"     "Col9a2"   "Vim"      "Calca"    "Scn7a"    "Gfap"    
##  [97] "Col18a1"  "Moxd1"    "Tcap"     "C1ql1"    "Nos1"     "Gpr149"  
## [103] "Cidea"    "Camp"     "Etv1"     "Fabp7"    "Lpar1"    "Htr3a"   
## [109] "Syt15"    "Tesc"     "Brinp3"   "Pkib"     "Upp1"     "Tulp3"   
## [115] "Robo2"    "Ass1"     "Timp4"    "Kcnb2"    "Edn1"     "Igfbp4"  
## [121] "Cst12"    "Id3"      "Sparcl1"  "Phgdh"    "Hoxb7"    "Postn"   
## [127] "Calb2"    "Vstm2a"   "Vsnl1"    "Lepr"     "Fam122b"  "Plp1"    
## [133] "Islr2"    "Fgl2"     "C1ql2"    "Sostdc1"  "Ifit3"    "Krtdap"  
## [139] "Prune2"   "Nrip3"    "Iigp1"    "Sfrp1"    "Snca"     "S100a11" 
## [145] "Gpr85"    "Gng2"     "Gna14"    "Kcna1"    "Thy1"     "Id1"     
## [151] "Rerg"     "Htr2b"    "Serpinh1" "Ctla2a"   "Gpm6b"    "Sfrp5"   
## [157] "Dbi"      "Npas1"    "Spink8"   "Grm5"     "Id4"      "Exosc2"  
## [163] "Th"       "Ecel1"    "Nid1"     "Tmc3"     "Cdh9"     "Col11a1" 
## [169] "Spock3"   "Fam89a"   "Timp3"    "Col14a1"  "Rph3a"    "Ngfr"    
## [175] "Gpr37l1"  "Itm2a"    "Omp"      "Pbx3"     "Nkain4"   "Rab27b"  
## [181] "Gng8"     "Fam159b"  "Atp1a2"   "Plac8"    "Kcnk2"    "Rad51ap2"
## [187] "Fxyd3"    "Akap7"    "Pcdh9"    "Sdc4"     "Nrp2"     "Abhd11os"
## [193] "Otof"     "Tmprss5"  "Alcam"    "Scn11a"   "S100b"    "Otor"    
## [199] "Tmem35"   "Sema5a"   "Mgll"     "Tagln"    "Cysltr2"  "Basp1"   
## [205] "Ifi203"   "Tlx2"     "Cxcl10"   "Ddah1"    "Klhl1"    "Ghrh"    
## [211] "Poc1a"    "Gfral"    "Fxyd5"    "Hpca"     "H2-Q2"    "Cav1"    
## [217] "Gda"      "Cmtm5"    "Mal"      "Bdnf"     "Adora1"   "Foxd3"   
## [223] "Nrn1"     "Ifit1"    "Pdzrn4"   "Resp18"   "Art3"     "Npy5r"   
## [229] "Peg10"    "Oas1a"    "Nol4l"    "Fbxo2"    "P2rx2"    "Pcsk1"   
## [235] "Plxna4"   "Cpne8"    "Gsta4"    "Stxbp6"   "Tbx2"     "Mrap2"   
## [241] "Bmp5"     "Lhfpl2"   "Hoxa7"    "Islr"     "Col4a1"   "Pdlim2"  
## [247] "Sag"      "Nrsn2"    "Ttyh1"    "Hoxb8"    "Wif1"     "Syt2"    
## [253] "Aplf"     "Kit"      "Fam64a"   "Slc6a4"   "Col1a2"   "Col20a1" 
## [259] "Papss2"   "Gbp2"     "Hes1"     "Ttr"      "Kcnip4"   "Enc1"    
## [265] "Trp53i11" "Id2"      "Snx7"     "Itgb6"    "Ush1c"    "Slc25a48"
## [271] "Pou3f1"   "Cst3"     "Rprm"     "Vcan"     "Xist"     "Vcam1"   
## [277] "Nhlh2"    "Ltk"      "Sncg"     "Tbx3"     "Cygb"     "Hopx"    
## [283] "Pkp3"     "Cnr1"     "Rbp1"     "F2r"      "Ssu2"     "Cd79a"   
## [289] "Fbln2"    "Myl9"     "Fam46a"   "Txnip"    "Parm1"    "Serpinf1"
## [295] "Fjx1"     "Cpxm2"    "Col6a3"   "Synpr"    "Kcnv1"    "Cx3cr1"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:24
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param =20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3160
## Number of edges: 108327
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8251
## Number of communities: 23
## Elapsed time: 0 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity=100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 135)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 21:34:10 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:34:10 Read 3160 rows and found 24 numeric columns
## 21:34:10 Using Annoy for neighbor search, n_neighbors = 20
## 21:34:10 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:34:11 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpqUTGyU\file4be839272be3
## 21:34:11 Searching Annoy index using 1 thread, search_k = 2000
## 21:34:11 Annoy recall = 100%
## 21:34:12 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 21:34:13 Initializing from normalized Laplacian + noise (using irlba)
## 21:34:13 Commencing optimization for 500 epochs, with 83610 positive edges
## 21:34:21 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

markers.neur <- list(PEMN=c("Chat","Tac1","Drd2"),
                     PIMN=c("Nos1","Vip","Adm","Lgr5"),
                     PIN=c("Penk","Prlr","Mtnr1a"),
                     PSN=c("Calca","Calcb","Cck","Bdnf",
                           "Piezo2","Nog","Nmu","Sst","Il4ra",
                           "Il13ra1","Il7"),
                     PSVN=c("Glp2r","Fst","Csf2rb","Csf2rb2"),
                     Glia=c("Plp1","Gfap","Rxrg"),  # add three more
                     mosue=c("Fos","Actl6a","Actl6b","Phox2b","Sox10","Mki67","Top2a")  # Baf53
                     )
unlist(markers.neur)
##     PEMN1     PEMN2     PEMN3     PIMN1     PIMN2     PIMN3     PIMN4      PIN1 
##    "Chat"    "Tac1"    "Drd2"    "Nos1"     "Vip"     "Adm"    "Lgr5"    "Penk" 
##      PIN2      PIN3      PSN1      PSN2      PSN3      PSN4      PSN5      PSN6 
##    "Prlr"  "Mtnr1a"   "Calca"   "Calcb"     "Cck"    "Bdnf"  "Piezo2"     "Nog" 
##      PSN7      PSN8      PSN9     PSN10     PSN11     PSVN1     PSVN2     PSVN3 
##     "Nmu"     "Sst"   "Il4ra" "Il13ra1"     "Il7"   "Glp2r"     "Fst"  "Csf2rb" 
##     PSVN4     Glia1     Glia2     Glia3    mosue1    mosue2    mosue3    mosue4 
## "Csf2rb2"    "Plp1"    "Gfap"    "Rxrg"     "Fos"  "Actl6a"  "Actl6b"  "Phox2b" 
##    mosue5    mosue6    mosue7 
##   "Sox10"   "Mki67"   "Top2a"
DotPlot(GEX.seur, features = as.vector(unlist(markers.neur)), group.by = "seurat_clusters") + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: Drd2

DoubletFinder

library(DoubletFinder)
## 载入需要的程辑包:KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 载入需要的程辑包:ROCR

## NULL
## [1] "Creating 1053 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Creating 1053 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."

markers

figure2

check.fig2 <- list(Neurotransmission=c("Oprk1","Adrb2","Cckar","Htr2b",
                                       "Gucy2g","Galr1","Vipr2","Grin3a",
                                       "Adora1","Crhr2","Chrna2","Tacr3",
                                       "Gabrg3","Nmur2","Grm5","P2ry6",
                                       "Galr2","Sstr2","Gabre","Npy5r",
                                       "Npy1r","Sstr5"),
                   Othersignaling=c("Cxcl12","Fgfr2","Ntrk2","Egfr",
                                    "Nmbr","Ptgfr","Pgf","Edn1",
                                    "Kit","Prokr2","Islr2","Gfral",
                                    "Mc4r","Bdnf","Kitl","Gfra1",
                                    "Tgfbr2","Ednra","Prokr1","Bmpr1b"),
                   Ionchannels=c("Kcns3","Ano10","Kcnip4","Kcnip1",
                                 "Kcnn2","Kcnn3","Ano2","Ano8",
                                 "Kcna2","Scn1a","Kcna5","Kcnab1",
                                 "Cacna1i","Kcnd2","Kcnv1",
                                 "Cacng5","Piezo2","Piezo1"),   # Peizo1 manually added 
                   Adhesionmolecules=c("Ly6c1","Itgb5","Sema3e","Ntn1",
                                       "Slitrk4","Itga6","Cdh8","Ptpru",
                                       "Itgb6","Unc5b","Avil","Sema5a",
                                       "Epha8","Cdh7","Itga1","Ephb6",
                                       "Flrt2","Nxph2","Ntng1"),
                   Transcriptionfactors=c("Satb1","Ebf3","Bnc2","Nfatc1",
                                          "Zfp503","Satb2","Cux2","Dlx3",
                                          "Atoh8","Zfp804a","Ebf2","Pbx3",
                                          "Meis1","Etv1","St18","Ebf1",
                                          "Neurod6","Trps1","Zfp800","Onecut2",
                                          "Phox2a","Phox2b"),
                   AnnexinsCopines=c("Anxa4","Anxa3","Anxa11","Anxa2",
                                     "Anxa5","Anxa6","Anxa7","Cpne4",
                                     "Cpne5","Cpne7","Cpne2","Cpne3",
                                     "Cpne8"))
names.fig2 <- names(check.fig2)
check.fig2
## $Neurotransmission
##  [1] "Oprk1"  "Adrb2"  "Cckar"  "Htr2b"  "Gucy2g" "Galr1"  "Vipr2"  "Grin3a"
##  [9] "Adora1" "Crhr2"  "Chrna2" "Tacr3"  "Gabrg3" "Nmur2"  "Grm5"   "P2ry6" 
## [17] "Galr2"  "Sstr2"  "Gabre"  "Npy5r"  "Npy1r"  "Sstr5" 
## 
## $Othersignaling
##  [1] "Cxcl12" "Fgfr2"  "Ntrk2"  "Egfr"   "Nmbr"   "Ptgfr"  "Pgf"    "Edn1"  
##  [9] "Kit"    "Prokr2" "Islr2"  "Gfral"  "Mc4r"   "Bdnf"   "Kitl"   "Gfra1" 
## [17] "Tgfbr2" "Ednra"  "Prokr1" "Bmpr1b"
## 
## $Ionchannels
##  [1] "Kcns3"   "Ano10"   "Kcnip4"  "Kcnip1"  "Kcnn2"   "Kcnn3"   "Ano2"   
##  [8] "Ano8"    "Kcna2"   "Scn1a"   "Kcna5"   "Kcnab1"  "Cacna1i" "Kcnd2"  
## [15] "Kcnv1"   "Cacng5"  "Piezo2"  "Piezo1" 
## 
## $Adhesionmolecules
##  [1] "Ly6c1"   "Itgb5"   "Sema3e"  "Ntn1"    "Slitrk4" "Itga6"   "Cdh8"   
##  [8] "Ptpru"   "Itgb6"   "Unc5b"   "Avil"    "Sema5a"  "Epha8"   "Cdh7"   
## [15] "Itga1"   "Ephb6"   "Flrt2"   "Nxph2"   "Ntng1"  
## 
## $Transcriptionfactors
##  [1] "Satb1"   "Ebf3"    "Bnc2"    "Nfatc1"  "Zfp503"  "Satb2"   "Cux2"   
##  [8] "Dlx3"    "Atoh8"   "Zfp804a" "Ebf2"    "Pbx3"    "Meis1"   "Etv1"   
## [15] "St18"    "Ebf1"    "Neurod6" "Trps1"   "Zfp800"  "Onecut2" "Phox2a" 
## [22] "Phox2b" 
## 
## $AnnexinsCopines
##  [1] "Anxa4"  "Anxa3"  "Anxa11" "Anxa2"  "Anxa5"  "Anxa6"  "Anxa7"  "Cpne4" 
##  [9] "Cpne5"  "Cpne7"  "Cpne2"  "Cpne3"  "Cpne8"
lapply(check.fig2, length)
## $Neurotransmission
## [1] 22
## 
## $Othersignaling
## [1] 20
## 
## $Ionchannels
## [1] 18
## 
## $Adhesionmolecules
## [1] 19
## 
## $Transcriptionfactors
## [1] 22
## 
## $AnnexinsCopines
## [1] 13
names.fig2
## [1] "Neurotransmission"    "Othersignaling"       "Ionchannels"         
## [4] "Adhesionmolecules"    "Transcriptionfactors" "AnnexinsCopines"
DotPlot(GEX.seur, features = check.fig2$Neurotransmission, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1])

DotPlot(GEX.seur, features = check.fig2$Othersignaling, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2])

DotPlot(GEX.seur, features = check.fig2$Ionchannels, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3])

DotPlot(GEX.seur, features = check.fig2$Adhesionmolecules, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4])

DotPlot(GEX.seur, features = check.fig2$Transcriptionfactors, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5])

DotPlot(GEX.seur, features = check.fig2$AnnexinsCopines, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6])

GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(0,2,15,  # ENC1
                                            6,3,     # ENC2
                                            8,5,     # ENC3
                                            22,      # ENC4
                                            21,      # ENC5
                                            19,      # ENC6
                                            12,17,   # ENC7
                                            1,       # ENC8
                                            7,9,13,  # ENC9
                                            10,      # ENC10
                                            18,      # ENC11
                                            14,      # ENC12
                                            
                                            16,      # Glia1
                                            4,       # Glia2
                                            11,      # Glia3
                                            20       # Glia4
                                            ))

GEX.seur$preAnno <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno[GEX.seur$preAnno %in% c(0,2,15)] <- "ENC1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(6,3)] <- "ENC2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(8,5)] <- "ENC3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(22)] <- "ENC4"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(21)] <- "ENC5"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(19)] <- "ENC6"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(12,17)] <- "ENC7"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(1)] <- "ENC8"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(7,9,13)] <- "ENC9"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(10)] <- "ENC10"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(18)] <- "ENC11"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(14)] <- "ENC12"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(16)] <- "Glia1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(4)] <- "Glia2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(11)] <- "Glia3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(20)] <- "Glia4"

GEX.seur$preAnno <- factor(GEX.seur$preAnno,
                           levels = c(paste0("ENC",1:12),
                                      paste0("Glia",1:4)))
DotPlot(GEX.seur, features = check.fig2$Neurotransmission, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1])

DotPlot(GEX.seur, features = check.fig2$Othersignaling, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2])

DotPlot(GEX.seur, features = check.fig2$Ionchannels, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3])

DotPlot(GEX.seur, features = check.fig2$Adhesionmolecules, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4])

DotPlot(GEX.seur, features = check.fig2$Transcriptionfactors, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5])

DotPlot(GEX.seur, features = check.fig2$AnnexinsCopines, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6])

cowplot::plot_grid(
DotPlot(GEX.seur, features = check.fig2$Neurotransmission, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1]),

DotPlot(GEX.seur, features = check.fig2$Othersignaling, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2]),

DotPlot(GEX.seur, features = check.fig2$Ionchannels, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3]),

DotPlot(GEX.seur, features = check.fig2$Adhesionmolecules, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4]),

DotPlot(GEX.seur, features = check.fig2$Transcriptionfactors, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5]),

DotPlot(GEX.seur, features = check.fig2$AnnexinsCopines, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6]),
ncol = 2)

cowplot::plot_grid(
DotPlot(GEX.seur, features = check.fig2$Neurotransmission, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1]),

DotPlot(GEX.seur, features = check.fig2$Othersignaling, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2]),

DotPlot(GEX.seur, features = check.fig2$Ionchannels, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3]),

DotPlot(GEX.seur, features = check.fig2$Adhesionmolecules, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4]),

DotPlot(GEX.seur, features = check.fig2$Transcriptionfactors, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5]),

DotPlot(GEX.seur, features = check.fig2$AnnexinsCopines, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6]),
ncol = 2)

DotPlot(GEX.seur, features = as.vector(unlist(markers.neur)), group.by = "sort_clusters") + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: Drd2

DotPlot(GEX.seur, features = as.vector(unlist(markers.neur)), group.by = "preAnno") + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: Drd2

color.pre <- c("#8AB6CE","#678BB1","#3975C1","#4CC1BD",
               "#C03778","#97BA59","#DFAB16","#BF7E6B",
               "#D46B35","#BDAE8D","#BD66C4","#2BA956",
               "#FF8080","#FF8080","#FF8080","#FF0000")

#saveRDS(GEX.seur,"GSE149524.P21_rep1.initial.rds")